{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 实现混淆矩阵，精准率和召回率"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from sklearn import datasets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "digits = datasets.load_digits()\n",
    "X = digits.data\n",
    "y = digits.target.copy()\n",
    "\n",
    "y[digits.target==9] = 1\n",
    "y[digits.target!=9] = 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=666)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.97555555555555551"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.linear_model import LogisticRegression\n",
    "\n",
    "log_reg = LogisticRegression()\n",
    "log_reg.fit(X_train, y_train)\n",
    "log_reg.score(X_test, y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "y_log_predict = log_reg.predict(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "403"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def TN(y_true, y_predict):\n",
    "    assert len(y_true) == len(y_predict)\n",
    "    return np.sum((y_true == 0) & (y_predict == 0))\n",
    "\n",
    "TN(y_test, y_log_predict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def FP(y_true, y_predict):\n",
    "    assert len(y_true) == len(y_predict)\n",
    "    return np.sum((y_true == 0) & (y_predict == 1))\n",
    "\n",
    "FP(y_test, y_log_predict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "9"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def FN(y_true, y_predict):\n",
    "    assert len(y_true) == len(y_predict)\n",
    "    return np.sum((y_true == 1) & (y_predict == 0))\n",
    "\n",
    "FN(y_test, y_log_predict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "36"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def TP(y_true, y_predict):\n",
    "    assert len(y_true) == len(y_predict)\n",
    "    return np.sum((y_true == 1) & (y_predict == 1))\n",
    "\n",
    "TP(y_test, y_log_predict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[403,   2],\n",
       "       [  9,  36]])"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def confusion_matrix(y_true, y_predict):\n",
    "    return np.array([\n",
    "        [TN(y_true, y_predict), FP(y_true, y_predict)],\n",
    "        [FN(y_true, y_predict), TP(y_true, y_predict)]\n",
    "    ])\n",
    "\n",
    "confusion_matrix(y_test, y_log_predict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.94736842105263153"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def precision_score(y_true, y_predict):\n",
    "    tp = TP(y_true, y_predict)\n",
    "    fp = FP(y_true, y_predict)\n",
    "    try:\n",
    "        return tp / (tp + fp)\n",
    "    except:\n",
    "        return 0.0\n",
    "    \n",
    "precision_score(y_test, y_log_predict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.80000000000000004"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def recall_score(y_true, y_predict):\n",
    "    tp = TP(y_true, y_predict)\n",
    "    fn = FN(y_true, y_predict)\n",
    "    try:\n",
    "        return tp / (tp + fn)\n",
    "    except:\n",
    "        return 0.0\n",
    "    \n",
    "recall_score(y_test, y_log_predict)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### scikit-learn中的混淆矩阵，精准率和召回率"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[403,   2],\n",
       "       [  9,  36]])"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.metrics import confusion_matrix\n",
    "\n",
    "confusion_matrix(y_test, y_log_predict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.94736842105263153"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.metrics import precision_score\n",
    "\n",
    "precision_score(y_test, y_log_predict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.80000000000000004"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.metrics import recall_score\n",
    "\n",
    "recall_score(y_test, y_log_predict)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
